import os
import sys
import logging
import warnings

import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
import matplotlib.pyplot as plt
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller

from sklearn.metrics import roc_auc_score, accuracy_score, confusion_matrix, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

print("--- STARTING PHASE 4: US MODEL ANALYSIS ---")

# --- 1. Load Your Clean Dataset ---
try:
    # We load the CSV, set the first column (the date) as the index
    model_df = pd.read_csv('GSSI_model_dataset_US.csv', index_col=0, parse_dates=True)
    print("Dataset 'GSSI_model_dataset_US.csv' loaded successfully.")
except FileNotFoundError:
    print("---!!! FATAL ERROR !!!---")
    print("File 'GSSI_model_dataset_US.csv' not found.")
    print("Please run the 'Script for US Model' (v15) first.")
    exit()

# --- 2. Create the 'Lead' Variable ---
# We are testing if the ratio at time 't' predicts a recession at 't+1'
model_df['Recession_Next_Q'] = model_df['NBER_RECESSION'].shift(-1)

# Drop the last row, which will have a NaN from shifting
model_df = model_df.dropna(subset=['Recession_Next_Q'])

# --- 3. Descriptive Visual (The "Money Shot") ---
print("\nGenerating descriptive plot (MX/LX Ratio vs. Recessions)...")
fig, ax1 = plt.subplots(figsize=(14, 7))

# Plot the MX/LX Ratio
ax1.plot(model_df.index, model_df['MX_LX_Ratio'], color='blue', label='MX/LX Ratio (GSSI)')
ax1.set_ylabel('GSSI (MX/LX Ratio)')
ax1.set_xlabel('Year')

# Shade the NBER recession periods
ax1.fill_between(model_df.index, 0, 1, where=model_df['NBER_RECESSION'] == 1,
                   color='red', alpha=0.3, transform=ax1.get_xaxis_transform(), 
                   label='NBER Recession')

ax1.legend(loc='upper left')
plt.title('US GSSI vs. NBER Recessions (2004-2024)')
plt.grid(True)
plt.savefig('GSSI_vs_Recessions_Plot.png')
print("...Plot saved as 'GSSI_vs_Recessions_Plot.png'.")

# --- 4. Model 1: The Full Logit Regression (The Core Test) ---
print("\n--- Running Full Logit Model (Horserace) ---")

# Define Y and X (with controls)
Y = model_df['Recession_Next_Q']
X = model_df[['MX_LX_Ratio', 'VIX', 'SP500_Return']]
X = sm.add_constant(X) # Add an intercept

# Run the full Logit
logit_model = sm.Logit(Y, X).fit(disp=0) # disp=0 hides the running output
print(logit_model.summary())

# --- 5. Model 2: Granger Causality (QJE Standard) ---
print("\n--- Running Granger Causality Test ---")

# We must use stationary data for VAR/Granger
adf_test = adfuller(model_df['MX_LX_Ratio'].dropna())
if adf_test[1] > 0.05: # p-value > 0.05 means non-stationary
    print(f"ADF test p-value: {adf_test[1]:.4f}. Ratio is non-stationary. Using first difference.")
    model_df['Ratio_stationary'] = model_df['MX_LX_Ratio'].diff()
else:
    print(f"ADF test p-value: {adf_test[1]:.4f}. Ratio is stationary. Using as-is.")
    model_df['Ratio_stationary'] = model_df['MX_LX_Ratio']

# Prep data (must be stationary)
var_data = model_df[['GDP_GROWTH', 'Ratio_stationary', 'VIX']].dropna()
var_model = VAR(var_data)
# 4 lags is standard for quarterly data
var_results = var_model.fit(maxlags=4, ic='aic')

# H0: Ratio does NOT Granger-cause GDP_GROWTH
test_result = var_results.test_causality('GDP_GROWTH', 'Ratio_stationary', kind='f')
print(f"\nGranger Test (H0: GSSI does not cause GDP Growth):")
print(f"P-value: {test_result.pvalue:.4f}")

print("\n--- ANALYSIS COMPLETE ---")